* This program creates Baseline Balance (table 1) for wave 2, as well as assorted other
* appendix tables that include additional variables/wave1 balance.

local folder "C:\Users\mlevere\OneDrive - Mathematica\Documents\Projects\Nepal\"

clear all
set more off

program mysvyregress, eclass
syntax varlist [if] [in], by(varname) [*]
marksample touse
markout `touse' `by'
tempname d_p
foreach var of local varlist {
   qui svy: regress `var' `by' `if'
   mat `d_p' = nullmat(`d_p'), e(p)
}
foreach mat in d_p {
   mat coln ``mat'' = `varlist'
}
eret local cmd "mysvyregress"
foreach mat in d_p {
   eret mat `mat' = ``mat''
} 
end

*capt log close
*log using "table01_baselinebalance.log", replace

/* Wave 2 households only: Women variables first
   1. Woman Age
   2. Ever Attended School
   3. Knowledge Index
   4. Fed child non-breast milk within the first 3 days of birth */
use "`folder'/Data/baseline_raw_women", clear

merge m:1 district vdc_name using "`folder'/Data/wave1_vdcs.dta"

// Define the full sample
gen full_sample = 1

// Define the potential endline sample
gen endline_sample = _merge == 1
drop _merge

merge m:1 www hh using "`folder'/Data/baseline_flag_hh", keep(3) nogen
/* Create variables */
gen ever_school = school == 1

gen exclusive_6mo = knowledge_exclusivebreastfeed == 2
gen preg_more = knowledge_pregnant_food == 1
gen diarrhea_more = knowledge_breastfeed_diahrrea == 3
gen givebaby_firstmilk = knowledge_firstmilk == 1
gen feed_6t11 = knowledge_6t11_fed >= 2 & knowledge_6t11_fed <= 4
gen snacks_12t24 = knowledge_12t24_snacks >= 4 & knowledge_12t24_snacks <= 6
gen iron_3t4 = knowledge_iron_supp == 3 | knowledge_iron_supp == 4
gen deworming_2ndtri = knowledge_deworming >= 4 & knowledge_deworming <= 7
gen vitaminA_45 = knowledge_vitaminA <= 45
gen anc_checkups_4 = knowledge_anc_checkups == 4

gen knowledge_index = givebaby_firstmilk + exclusive_6mo + feed_6t11 + snacks_12t24 + diarrhea_more + iron_3t4 + deworming_2ndtri + vitaminA_45 + preg_more + anc_checkups_4

gen threedays_nonbreastmilk = youngest_3days_nonbreastmilk == 1
replace threedays_nonbreastmilk = . if youngest_3days_nonbreastmilk == .

/* Drop outliers on expenditures/income */
drop if flag_spending == 1 | flag_missingwomen == 1

// Define the endline analysis sample as those who match to having endline data
merge 1:m www hh idc using "`folder'/Data/women_endline_reg_data", keepusing(www hh idc flag)
gen endline_data = _merge == 3 & flag == 0
drop if _merge == 2
drop _merge flag

// Define the follow-up analysis sample as those who match to having follow-up data
merge 1:m www hh idc using "`folder'/Data/women_follow_up_reg_data", keepusing(www hh idc flag)
gen follow_up_data = _merge == 3 & flag == 0 & endline_sample == 1
drop if _merge == 2
drop _merge flag

tab endline_data
tab follow_up_data

local sumvars "age_years ever_school knowledge_index threedays_nonbreastmilk"

keep www hh control_vdc info_vdc cash_vdc vdc_code co_code district treatment info_control cash_control cash_info endline_data follow_up_data `sumvars'

svyset vdc_code, strata(district)

label variable age_years "Mother Age"
label variable ever_school "Mother Attended School"
label variable knowledge_index "Knowledge Index"
label variable threedays_nonbreastmilk "Fed non-breastmilk w-in 3 days"

foreach wave in endline follow_up {

	if "`wave'" == "endline" local tblnum "1"
	if "`wave'" == "follow_up" local tblnum "A2"

	estpost su `sumvars' if treatment == 1 & `wave'_data == 1
	est store A

	estpost su `sumvars' if treatment == 2 & `wave'_data == 1
	est store B

	estpost su `sumvars' if treatment == 3 & `wave'_data == 1
	est store C

		esttab A B C using "`folder'/Output/table`tblnum'_baselinebalance_`wave'.tex", replace ///
			mtitle("Control" "Info Only" "Info + Cash") ///
			cells((mean(fmt(2)))) noobs  label booktabs nonum collabels(none) gaps f plain   

			
	mysvyregress `sumvars' if `wave'_data == 1, by(info_control)
	est store E

	mysvyregress `sumvars' if `wave'_data == 1, by(cash_control)
	est store F

		esttab E F using "`folder'/Output/table`tblnum'_baselinebalance_pvals_`wave'.tex", replace ///
			mtitle("Control" "Info Only" "Info + Cash") ///
			cells(d_p(par fmt(2))) noobs  label booktabs nonum collabels(none) gaps f plain   

	tab treatment if `wave'_data == 1

}

// Keep the endline/follow-up indicators, taking the max across the household
local samps "endline_data follow_up_data"

keep www hh `samps'

collapse (max) `samps', by(www hh)

tempfile hh_data_matching
save "`hh_data_matching'", replace



/* Child Variables:
   1. Child Age
   2. Child Underweight
   3. Child Stunted
   4. Child Wasted */

use "`folder'/Data/baseline_raw_child_z_rc", clear
merge m:1 district vdc_name using "`folder'/Data/wave1_vdcs.dta"

// Define the full sample
gen full_sample = 1

// Define the potential endline sample
gen endline_sample = _merge == 1
drop _merge

merge m:1 www hh using "`folder'/Data/baseline_flag_hh", keep(3) nogen

/* Create variables */
gen underweight = _zwei < -2
gen stunted = _zlen < -2
gen wasted = _zwfl < -2
gen child_age = child_tot_months/12

keep if child_age <= 2

/* Drop outliers on expenditures/income */
drop if flag_spending == 1 | flag_missingwomen == 1
rename q06_idc idc_child

by www hh idc_child, sort: keep if _n == 1

// Define the endline analysis sample as those who match to having endline data
merge 1:m www hh idc_child using "`folder'/Data/child_endline_reg_data", keepusing(www hh idc_child flag)
gen endline_data = _merge == 3 & flag == 0
drop if _merge == 2
drop _merge flag

by www hh idc_child, sort: keep if _n == 1

// Define the follow-up analysis sample as those who match to having follow-up data
merge 1:m www hh idc_child using "`folder'/Data/child_follow_up_reg_data", keepusing(www hh idc_child flag)
gen follow_up_data = _merge == 3 & flag == 0 & endline_sample == 1
drop if _merge == 2
drop _merge flag

by www hh idc_child, sort: keep if _n == 1

tab endline_data
tab follow_up_data

local sumvars "child_age underweight stunted wasted"
keep www hh control_vdc info_vdc cash_vdc vdc_code co_code district treatment info_control cash_control cash_info endline_data follow_up_data `sumvars'

svyset vdc_code, strata(district)

label variable child_age "Child Age"
label variable underweight "Child Underweight"
label variable stunted "Child Stunted"
label variable wasted "Child Wasted"

foreach wave in endline follow_up {


	if "`wave'" == "endline" local tblnum "1"
	if "`wave'" == "follow_up" local tblnum "A2"
	
	estpost su `sumvars' if treatment == 1 & `wave'_data == 1
	est store A

	estpost su `sumvars' if treatment == 2 & `wave'_data == 1
	est store B

	estpost su `sumvars' if treatment == 3 & `wave'_data == 1
	est store C

		esttab A B C using "`folder'/Output/table`tblnum'_baselinebalance_`wave'.tex", append ///
			nomtitles ///
			cells((mean(fmt(2)))) noobs  label booktabs nonum collabels(none) gaps f plain   

			
	mysvyregress `sumvars' if `wave'_data == 1, by(info_control)
	est store E

	mysvyregress `sumvars' if `wave'_data == 1, by(cash_control)
	est store F

		esttab E F using "`folder'/Output/table`tblnum'_baselinebalance_pvals_`wave'.tex", append ///
			nomtitles ///
			cells(d_p(par fmt(2))) noobs  label booktabs nonum collabels(none) gaps f plain   

	tab treatment if `wave'_data == 1
}
/* Household Variables:
   1. Total HH Members
   2. HH Head Male
   3. Electricity
   4. Stone Roof
   5. Annual Income
   6. Monthly Expenditures */

use "`folder'/Data/baseline_raw_women", clear

merge m:1 district vdc_name using "`folder'/Data/wave1_vdcs.dta"

// Define the full sample
gen full_sample = 1

// Define the potential endline sample
gen endline_sample = _merge == 1
drop _merge

merge m:1 www hh using "`folder'/Data/baseline_raw_hh", nogen keep(3) keepusing(reg_electricity roof num_hh_members hh_head_male income_total expend_total)

by www hh, sort: keep if _n == 1
merge 1:1 www hh using "`folder'/Data/baseline_flag_hh", nogen keep(3)
/* Create variables */
gen has_electricity = reg_electricity == 1
gen stone_roof = roof == 6

/* Drop outliers on expenditures/income */
drop if flag_spending == 1 | flag_missingwomen == 1

merge 1:1 www hh using "`hh_data_matching'", keepusing (www hh endline_data follow_up_data)
assert _merge == 3

local sumvars "num_hh_members hh_head_male has_electricity stone_roof income_total expend_total"
keep www hh control_vdc info_vdc cash_vdc vdc_code co_code district treatment info_control cash_control cash_info endline_data follow_up_data `sumvars'

svyset vdc_code, strata(district)

label variable num_hh_members "Number Household Members"
label variable hh_head_male "Household Head Male"
label variable has_electricity "Has Electricity"
label variable stone_roof "Stone Roofing Material"
label variable income_total "Annual Income"
label variable expend_total "Monthly Expenditures"


foreach wave in endline follow_up {

	if "`wave'" == "endline" local tblnum "1"
	if "`wave'" == "follow_up" local tblnum "A2"
	
	local sumvars "num_hh_members hh_head_male has_electricity stone_roof"


	estpost su `sumvars' if treatment == 1 & `wave'_data == 1
	est store A

	estpost su `sumvars' if treatment == 2 & `wave'_data == 1
	est store B

	estpost su `sumvars' if treatment == 3 & `wave'_data == 1
	est store C

		esttab A B C using "`folder'/Output/table`tblnum'_baselinebalance_`wave'.tex", append ///
			nomtitles ///
			cells((mean(fmt(2)))) noobs  label booktabs nonum collabels(none) gaps f plain   

			
	mysvyregress `sumvars' if `wave'_data == 1, by(info_control)
	est store E

	mysvyregress `sumvars' if `wave'_data == 1, by(cash_control)
	est store F

		esttab E F using "`folder'/Output/table`tblnum'_baselinebalance_pvals_`wave'.tex", append ///
			nomtitles ///
			cells(d_p(par fmt(2))) noobs  label booktabs nonum collabels(none) gaps f plain   

			
	local sumvars "income_total expend_total"

	estpost su `sumvars' if treatment == 1 & `wave'_data == 1
	est store A

	estpost su `sumvars' if treatment == 2 & `wave'_data == 1
	est store B

	estpost su `sumvars' if treatment == 3 & `wave'_data == 1
	est store C

		esttab A B C using "`folder'/Output/table`tblnum'_baselinebalance_`wave'.tex", append ///
			nomtitles ///
			cells((mean(fmt(0)))) noobs  label booktabs nonum collabels(none) gaps f plain   

			
	mysvyregress `sumvars' if `wave'_data == 1, by(info_control)
	est store E

	mysvyregress `sumvars' if `wave'_data == 1, by(cash_control)
	est store F

		esttab E F using "`folder'/Output/table`tblnum'_baselinebalance_pvals_`wave'.tex", append ///
			nomtitles ///
			cells(d_p(par fmt(2))) noobs  label booktabs nonum collabels(none) gaps f plain   

	tab treatment if `wave'_data == 1

}

